##########################
# Collect candidate CFscores
##########################

# load packages
library(dplyr)

# load data
candidates <- read.csv('dime_recipients_all_1979_2014.csv')


###

# 1. Get gubernatorial CFscores

# Subset data to 2012 cycle
candidates <- subset(candidates, cycle == 2012)

# Identify gubernatorial candidates
gub <- candidates[candidates$seat=="state:governor" & 
                    candidates$ran.general == 1 &
                    (candidates$party == "100" | candidates$party == "200") &
                    candidates$nimsp.office!="LIEUTENANT GOVERNOR",]

# Create Dem df
dem_gub <- gub[gub$party == '100',]

# remove WI recall primary losers and MS primary runoff loser
dem_gub <- dem_gub[(dem_gub$lname != 'falk' & dem_gub$lname != 'vinehout'),]
dem_gub <- dem_gub[(dem_gub$lname != 'luckett'),]

# Create GOP df
gop_gub <- subset(gub, party == '200')

# Add VT candidate, remove UT Lt Gov and convention losers
gop_gub <- rbind(gop_gub, subset(candidates, Cand.ID == 'VT7224'))
gop_gub <- subset(gop_gub, state != 'UT')
gop_gub <- rbind(gop_gub, subset(candidates, Cand.ID == 'UT7097'))


###

# 2. Get US House CFscores

# a. Identify general electionhouse candidates
house <- subset(candidates, seat == 'federal:house' &
                  ran.general == 1 &
                  (party == '100' | party == '200') &
                  gen.elect.pct != '')

# b. Identify districts with too many candidates
toomany <- table(house$district)
toomany <- toomany[toomany > 2]
toomany <- house$district %in% rownames(toomany)
problem_dists <- house[toomany == T,]
house <- house[toomany == F,]

# Remove extra candidates from these districts
problem_dists <- subset(problem_dists, complete.cases(recipient.cfscore))
problem_dists <- subset(problem_dists, name != "sanchez, linda")
problem_dists <- subset(problem_dists, name != "corey, matthew m mr")
problem_dists <- subset(problem_dists, name != "richard, ron")
problem_dists <- subset(problem_dists, name != "scaturro, frank")
problem_dists <- subset(problem_dists, ICPSR != "295702012")

# Fix late redistricting change
candidates$district[candidates$name == "grimm, michael"] <- "NY11"
candidates$district[candidates$name == "murphy, mark"] <- "NY11"

# Piece dataset together
house <- rbind(house, problem_dists)

# c. Create R and D dfs
dem_house <- subset(house, party == '100')
gop_house <- subset(house, party == '200')


###

# 3. Garbage collection and save

rm(list=setdiff(ls(), c("dem_gub",'gop_gub','gop_house','dem_house')))
save.image('cand_cfscores.RData')
